ps5

Problem Set5

Siyuan Wu

Discussed with Yingnan Li and Jingyan Zhang.

Github:

library(Rcpp)
library(plotly)
Loading required package: ggplot2

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(data.table)

Attaching package: 'data.table'

The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year

The following objects are masked from 'package:dplyr':

    between, first, last

The following object is masked from 'package:purrr':

    transpose
library(dplyr)

1

a
#inspired from chatgpt
# Define GCD and LCM in Rcpp
Rcpp::cppFunction('
int gcd(int a, int b) {
    while (b != 0) {
        int temp = b;
        b = a % b;
        a = temp;
    }
    return abs(a);
}

int lcm(int a, int b) {
    return abs(a * b) / gcd(a, b);
}
')
setClass("rational",
         slots = c(a = "numeric",
                   b = "numeric"))
#Validity the numerator and denominator
setValidity("rational", function(object) {
  if (object@b == 0) stop("denominator is zero")
  if(object@b%%1 != 0|object@a%%1 != 0) stop("numerator and denominator all need to be integer")
  return(TRUE)
})
Class "rational" [in ".GlobalEnv"]

Slots:
                      
Name:        a       b
Class: numeric numeric
#show function
setMethod("show", "rational", function(object) {
  print(paste(object@a, "/", object@b))
})

#set simplify as generic
setGeneric("simplify",
           function(object) {
             standardGeneric("simplify")
           })
Creating a new generic function for 'simplify' in the global environment
[1] "simplify"
#use rcpp
setMethod("simplify", "rational", function(object) {
  gcdrational <- gcd(object@a, object@b)
  object@a <- object@a / gcdrational
  object@b <- object@b / gcdrational
  return(object)
})

setGeneric("quotient", function(object, digits = 7) standardGeneric("quotient"))
[1] "quotient"
setMethod("quotient", "rational", function(object, digits = 7) {
  if(!is.numeric(digits)) stop("digits need to be an integer")
  if(digits%%1!=0)stop("digits need to be an integer, not a double")
  res <- object@a / object@b
  print(round(res, digits))
  return(round(res, digits))
})

#add and minus , same mode
setMethod("+", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            res_a=e1@a*e2@b+e1@b*e2@a
            res_b=e1@b*e2@b
            return(simplify(new("rational",a=res_a,b=res_b)))
          })
setMethod("-", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            return(e1+new("rational",a=-e2@a,b=e2@b))
          })

#times and division,set a validation in division
setMethod("*", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            res_a=e1@a*e2@a
            res_b=e1@b*e2@b
            return(simplify(new("rational",a=res_a,b=res_b)))
          })
setMethod("/", signature(e1 = "rational",
                         e2 = "rational"),
          function(e1, e2) {
            res_a=e1@a*e2@b
            res_b=e1@b*e2@a
            if(res_b==0)stop("result denominator is zero")
            return(simplify(new("rational",a=res_a,b=res_b)))
          })
b
r1<-new("rational",a=24,b=6)
r2<-new("rational",a=7,b=230)
r3<-new("rational",a=0,b=4)
r1
[1] "24 / 6"
r3
[1] "0 / 4"
r1 + r2
[1] "927 / 230"
r1 - r2
[1] "913 / 230"
r1 * r2
[1] "14 / 115"
r1 / r2
[1] "920 / 7"
r1 + r3
[1] "4 / 1"
r1 * r3
[1] "0 / 1"
r2 / r3
Error in r2/r3: result denominator is zero
quotient(r1)
[1] 4
[1] 4
quotient(r2)
[1] 0.0304348
[1] 0.0304348
quotient(r2, digits = 3)
[1] 0.03
[1] 0.03
quotient(r2, digits = 3.14)
Error in quotient(r2, digits = 3.14): digits need to be an integer, not a double
quotient(r2, digits = "avocado")
Error in quotient(r2, digits = "avocado"): digits need to be an integer
q2 <- quotient(r2, digits = 3)
[1] 0.03
q2
[1] 0.03
quotient(r3)
[1] 0
[1] 0
simplify(r1)
[1] "4 / 1"
simplify(r2)
[1] "7 / 230"
simplify(r3)
[1] "0 / 1"
c
#test the denominator = zero
test1<-new("rational",a=24,b=0)
Error in validityMethod(object): denominator is zero

2

a

We retrieve the solution code from ps4.

#code from last solution given by teacher
art=read.csv("df_for_ml_improved_new_market.csv")
art$Genre___Others[art$Genre___Painting == 1] <- 0
art$genre <- "Photography"
art$genre[art$Genre___Print == 1] <- "Print"
art$genre[art$Genre___Sculpture == 1] <- "Sculpture"
art$genre[art$Genre___Painting == 1] <- "Painting"
art$genre[art$Genre___Others == 1] <- "Other"
yeargenre <- with(art, table(year, genre))
yearge<-data.frame(yeargenre)

We use plotly here:

p <- plot_ly(yearge, x = ~ year, y = ~ Freq,color = ~genre,
             type = "scatter", mode = "lines")%>%
        layout(title = 'Frequency of Genre each year',
               plot_bgcolor = "#e5ecf6",  yaxis = list(title = 'frequency'))
p
b

We use the code from ps4 solution

artmedian <- aggregate(art$price_usd, by = list(art$year, art$genre),
                   FUN = median, na.rm = TRUE)
names(artmedian) <- c("year", "genre", "price_usd_median")
artmedianall <- aggregate(art$price_usd, by = list(art$year),
                   FUN = median, na.rm = TRUE)
names(artmedianall) <- c("year", "price_usd_median")
artmedianall$genre<-"All"
#combine two dataset to use plotly easier
artmedian<-rbind(artmedianall,artmedian)
#get from chatgpt and discussed with friends
p1 <- plot_ly(x =~year, y =~price_usd_median,color =~genre,type = 'scatter',
    mode = 'lines+markers',data=artmedian,text = ~paste("Genre:", genre, "<br>Year:", year, "<br>Avg Price (USD):", price_usd_median))%>%
  layout(
    title = "Change in Sales Price Over Time (Overall and By Genre)",
    xaxis = list(title = "Year"),
    yaxis = list(title = "Average Sales Price (USD)"),
    updatemenus = list(
      list(
        type = "dropdown",
        active = 0,
        buttons = list(
          list(label = "show both",
               method = "update",
               args = list(list(visible = c(rep(TRUE, n_distinct(artmedian$genre) ))),list(title = "Median Sales Price Over Time (Overall and Genre)"))),
          list(label = "All",
               method = "update",
               args = list(list(visible = c(TRUE, rep(FALSE, n_distinct(artmedian$genre) - 1))),
                           list(title = "Median Sales Price Over Time Overall)"))),
          list(label = "By Genre",
               method = "update",
               args = list(list(visible = c(FALSE, rep(TRUE, n_distinct(artmedian$genre) - 1))),
                           list(title = "Median Sales Price Over Time By Genre")))
        )
      )
    )
  )


p1

3

a
#change to data table
fl<-as.data.frame(flights)
setDT(fl)
ap<-as.data.frame(airports)
setDT(ap)

fl[,.(mean_delay = mean(dep_delay, na.rm = TRUE),med_delay = median(dep_delay, na.rm = TRUE),numflights = .N),by=origin
  ][numflights >= 10
  ][ap, on = .(origin = faa)
  ][!is.na(mean_delay)
  ][, .(name, mean_delay, med_delay)
  ][order(-mean_delay)
  ]
                  name mean_delay med_delay
                <char>      <num>     <num>
1: Newark Liberty Intl   15.10795        -1
2: John F Kennedy Intl   12.11216        -1
3:          La Guardia   10.34688        -3
fl[,.(mean_delay = mean(arr_delay, na.rm = TRUE),med_delay = median(arr_delay, na.rm = TRUE),numflights = .N),by=dest
  ][numflights >= 10
  ][ap, on = .(dest = faa)
  ][!is.na(mean_delay)
  ][, .(name, mean_delay, med_delay)
  ][order(-mean_delay)
  ]
                                    name   mean_delay med_delay
                                  <char>        <num>     <num>
 1:                Columbia Metropolitan  41.76415094      28.0
 2:                           Tulsa Intl  33.65986395      14.0
 3:                    Will Rogers World  30.61904762      16.0
 4:                 Jackson Hole Airport  28.09523810      15.0
 5:                        Mc Ghee Tyson  24.06920415       2.0
 6:               Dane Co Rgnl Truax Fld  20.19604317       1.0
 7:                        Richmond Intl  20.11125320       1.0
 8:        Akron Canton Regional Airport  19.69833729       3.0
 9:                      Des Moines Intl  19.00573614       0.0
10:                   Gerald R Ford Intl  18.18956044       1.0
11:                      Birmingham Intl  16.87732342      -2.0
12:         Theodore Francis Green State  16.23463687       1.0
13: Greenville-Spartanburg International  15.93544304      -0.5
14:    Cincinnati Northern Kentucky Intl  15.36456376      -3.0
15:            Savannah Hilton Head Intl  15.12950601      -1.0
16:          Manchester Regional Airport  14.78755365      -3.0
17:                          Eppley Afld  14.69889841      -2.0
18:                               Yeager  14.67164179      -1.5
19:                     Kansas City Intl  14.51405836       0.0
20:                          Albany Intl  14.39712919      -4.0
21:                General Mitchell Intl  14.16722038       0.0
22:                       Piedmont Triad  14.11260054      -2.0
23:               Washington Dulles Intl  13.86420212      -3.0
24:               Cherry Capital Airport  12.96842105     -10.0
25:              James M Cox Dayton Intl  12.68048606      -3.0
26:     Louisville International Airport  12.66938406      -2.0
27:                  Chicago Midway Intl  12.36422360      -1.0
28:                      Sacramento Intl  12.10992908       4.0
29:                    Jacksonville Intl  11.84483416      -2.0
30:                       Nashville Intl  11.81245891      -2.0
31:                Portland Intl Jetport  11.66040210      -4.0
32:               Greater Rochester Intl  11.56064461      -5.0
33:      Hartsfield Jackson Atlanta Intl  11.30011285      -1.0
34:                Lambert St Louis Intl  11.07846451      -3.0
35:                         Norfolk Intl  10.94909344      -4.0
36:            Baltimore Washington Intl  10.72673385      -5.0
37:                         Memphis Intl  10.64531435      -2.5
38:                   Port Columbus Intl  10.60132291      -3.0
39:                  Charleston Afb Intl  10.59296847      -4.0
40:                    Philadelphia Intl  10.12719014      -3.0
41:                  Raleigh Durham Intl  10.05238095      -3.0
42:                    Indianapolis Intl   9.94043412      -3.0
43:            Charlottesville-Albemarle   9.50000000      -5.0
44:               Cleveland Hopkins Intl   9.18161129      -5.0
45:        Ronald Reagan Washington Natl   9.06695204      -2.0
46:                      Burlington Intl   8.95099602      -4.0
47:                 Buffalo Niagara Intl   8.94595186      -5.0
48:                Syracuse Hancock Intl   8.90392501      -5.0
49:                          Denver Intl   8.60650021      -2.0
50:                      Palm Beach Intl   8.56297210      -3.0
51:                             Bob Hope   8.17567568      -3.0
52:       Fort Lauderdale Hollywood Intl   8.08212154      -3.0
53:                          Bangor Intl   8.02793296      -9.0
54:           Asheville Regional Airport   8.00383142      -1.0
55:                      Pittsburgh Intl   7.68099053      -5.0
56:                       Gallatin Field   7.60000000      -2.0
57:                 NW Arkansas Regional   7.46572581      -2.0
58:                           Tampa Intl   7.40852503      -4.0
59:               Charlotte Douglas Intl   7.36031885      -3.0
60:             Minneapolis St Paul Intl   7.27016886      -5.0
61:                      William P Hobby   7.17618819      -4.0
62:                         Bradley Intl   7.04854369     -10.0
63:                     San Antonio Intl   6.94537178      -9.0
64:                      South Bend Rgnl   6.50000000      -3.5
65:     Louis Armstrong New Orleans Intl   6.49017497      -6.0
66:                        Key West Intl   6.35294118       7.0
67:                        Eagle Co Rgnl   6.30434783      -4.0
68:                Austin Bergstrom Intl   6.01990875      -5.0
69:                   Chicago Ohare Intl   5.87661475      -8.0
70:                         Orlando Intl   5.45464309      -5.0
71:               Detroit Metro Wayne Co   5.42996346      -7.0
72:                        Portland Intl   5.14157973      -5.0
73:                        Nantucket Mem   4.85227273      -3.0
74:                      Wilmington Intl   4.63551402      -7.0
75:                    Myrtle Beach Intl   4.60344828     -13.0
76:    Albuquerque International Sunport   4.38188976      -5.5
77:         George Bush Intercontinental   4.24079040      -5.0
78:        Norman Y Mineta San Jose Intl   3.44817073      -7.0
79:               Southwest Florida Intl   3.23814963      -5.0
80:                       San Diego Intl   3.13916574      -5.0
81:              Sarasota Bradenton Intl   3.08243131      -5.0
82:            Metropolitan Oakland Intl   3.07766990      -9.0
83:   General Edward Lawrence Logan Intl   2.91439222      -9.0
84:                   San Francisco Intl   2.67289152      -8.0
85:                         Yampa Valley   2.14285714       2.0
86:              Phoenix Sky Harbor Intl   2.09704733      -6.0
87:            Montrose Regional Airport   1.78571429     -10.5
88:                     Los Angeles Intl   0.54711094      -7.0
89:               Dallas Fort Worth Intl   0.32212685      -9.0
90:                           Miami Intl   0.29905978      -9.0
91:                       Mc Carran Intl   0.25772849      -8.0
92:                  Salt Lake City Intl   0.17625459      -8.0
93:                           Long Beach  -0.06202723     -10.0
94:                Martha\\\\'s Vineyard  -0.28571429     -11.0
95:                  Seattle Tacoma Intl  -1.09909910     -11.0
96:                        Honolulu Intl  -1.36519258      -7.0
97:            John Wayne Arpt Orange Co  -7.86822660     -11.0
98:                    Palm Springs Intl -12.72222222     -13.5
                                    name   mean_delay med_delay
b
#change to data table
pl<-as.data.frame(planes)
setDT(pl)
fl[pl, on = .(tailnum), nomatch = 0 
][!is.na(air_time) & air_time > 0
][,.(time=air_time/60,mph = distance/air_time*60,model)
][,.(avgmph = mean(mph, na.rm = TRUE),nflights = .N),by=model
][order(-avgmph)
][1]
     model   avgmph nflights
    <char>    <num>    <int>
1: 777-222 482.6254        4